Data preparation
import pandas as pd
case_df = pd.read_excel('311_Cases_cleaned.xlsx')
# Get unique combinations of neighborhood and year
neighborhoods_years = case_df[['neighborhood', 'year']].drop_duplicates()
# Loop through each combination and save the filtered data to a new Excel file
for index, row in neighborhoods_years.iterrows():
neighborhood = row['neighborhood']
year = row['year']
# Filter the data for the current neighborhood and year
filtered_data = case_df[(case_df['neighborhood'] == neighborhood) & (case_df['year'] == year)]
# Define the filename for the new Excel file
filename = f"{neighborhood}_{year}.xlsx"
# Save the filtered data to a new Excel file
filtered_data.to_excel(filename, index=False)
print(f"Data for {neighborhood}, {year} saved to {filename}")
Data for Bayview, 2012 saved to Bayview_2012.xlsx Data for Bayview, 2014 saved to Bayview_2014.xlsx Data for Bayview, 2016 saved to Bayview_2016.xlsx Data for Bayview, 2018 saved to Bayview_2018.xlsx Data for Bernal Heights, 2012 saved to Bernal Heights_2012.xlsx Data for Bernal Heights, 2014 saved to Bernal Heights_2014.xlsx Data for Bernal Heights, 2016 saved to Bernal Heights_2016.xlsx Data for Bernal Heights, 2018 saved to Bernal Heights_2018.xlsx Data for Chinatown, 2012 saved to Chinatown_2012.xlsx Data for Chinatown, 2014 saved to Chinatown_2014.xlsx Data for Chinatown, 2016 saved to Chinatown_2016.xlsx Data for Chinatown, 2018 saved to Chinatown_2018.xlsx Data for Excelsior, 2012 saved to Excelsior_2012.xlsx Data for Excelsior, 2014 saved to Excelsior_2014.xlsx Data for Excelsior, 2016 saved to Excelsior_2016.xlsx Data for Excelsior, 2018 saved to Excelsior_2018.xlsx Data for Haight Ashbury, 2012 saved to Haight Ashbury_2012.xlsx Data for Haight Ashbury, 2014 saved to Haight Ashbury_2014.xlsx Data for Haight Ashbury, 2016 saved to Haight Ashbury_2016.xlsx Data for Haight Ashbury, 2018 saved to Haight Ashbury_2018.xlsx Data for Inner Richmond, 2012 saved to Inner Richmond_2012.xlsx Data for Inner Richmond, 2014 saved to Inner Richmond_2014.xlsx Data for Inner Richmond, 2016 saved to Inner Richmond_2016.xlsx Data for Inner Richmond, 2018 saved to Inner Richmond_2018.xlsx Data for Inner Sunset, 2012 saved to Inner Sunset_2012.xlsx Data for Inner Sunset, 2014 saved to Inner Sunset_2014.xlsx Data for Inner Sunset, 2016 saved to Inner Sunset_2016.xlsx Data for Inner Sunset, 2018 saved to Inner Sunset_2018.xlsx Data for Marina, 2012 saved to Marina_2012.xlsx Data for Marina, 2014 saved to Marina_2014.xlsx Data for Marina, 2016 saved to Marina_2016.xlsx Data for Marina, 2018 saved to Marina_2018.xlsx Data for Mission, 2012 saved to Mission_2012.xlsx Data for Mission, 2014 saved to Mission_2014.xlsx Data for Mission, 2016 saved to Mission_2016.xlsx Data for Mission, 2018 saved to Mission_2018.xlsx Data for Noe Valley, 2012 saved to Noe Valley_2012.xlsx Data for Noe Valley, 2014 saved to Noe Valley_2014.xlsx Data for Noe Valley, 2016 saved to Noe Valley_2016.xlsx Data for Noe Valley, 2018 saved to Noe Valley_2018.xlsx Data for Outer Mission, 2012 saved to Outer Mission_2012.xlsx Data for Outer Mission, 2014 saved to Outer Mission_2014.xlsx Data for Outer Mission, 2016 saved to Outer Mission_2016.xlsx Data for Outer Mission, 2018 saved to Outer Mission_2018.xlsx Data for Outer Richmond, 2012 saved to Outer Richmond_2012.xlsx Data for Outer Richmond, 2014 saved to Outer Richmond_2014.xlsx Data for Outer Richmond, 2016 saved to Outer Richmond_2016.xlsx Data for Outer Richmond, 2018 saved to Outer Richmond_2018.xlsx Data for Pacific Heights, 2012 saved to Pacific Heights_2012.xlsx Data for Pacific Heights, 2014 saved to Pacific Heights_2014.xlsx Data for Pacific Heights, 2016 saved to Pacific Heights_2016.xlsx Data for Pacific Heights, 2018 saved to Pacific Heights_2018.xlsx Data for Portola, 2012 saved to Portola_2012.xlsx Data for Portola, 2014 saved to Portola_2014.xlsx Data for Portola, 2016 saved to Portola_2016.xlsx Data for Portola, 2018 saved to Portola_2018.xlsx Data for Potrero Hill, 2012 saved to Potrero Hill_2012.xlsx Data for Potrero Hill, 2014 saved to Potrero Hill_2014.xlsx Data for Potrero Hill, 2016 saved to Potrero Hill_2016.xlsx Data for Potrero Hill, 2018 saved to Potrero Hill_2018.xlsx Data for Presidio Heights, 2012 saved to Presidio Heights_2012.xlsx Data for Presidio Heights, 2014 saved to Presidio Heights_2014.xlsx Data for Presidio Heights, 2016 saved to Presidio Heights_2016.xlsx Data for Presidio Heights, 2018 saved to Presidio Heights_2018.xlsx Data for Seacliff, 2012 saved to Seacliff_2012.xlsx Data for Seacliff, 2014 saved to Seacliff_2014.xlsx Data for Seacliff, 2016 saved to Seacliff_2016.xlsx Data for Seacliff, 2018 saved to Seacliff_2018.xlsx Data for South of Market, 2012 saved to South of Market_2012.xlsx Data for South of Market, 2014 saved to South of Market_2014.xlsx Data for South of Market, 2016 saved to South of Market_2016.xlsx Data for South of Market, 2018 saved to South of Market_2018.xlsx Data for Visitacion Valley, 2012 saved to Visitacion Valley_2012.xlsx Data for Visitacion Valley, 2014 saved to Visitacion Valley_2014.xlsx Data for Visitacion Valley, 2016 saved to Visitacion Valley_2016.xlsx Data for Visitacion Valley, 2018 saved to Visitacion Valley_2018.xlsx Data for Western Addition, 2012 saved to Western Addition_2012.xlsx Data for Western Addition, 2014 saved to Western Addition_2014.xlsx Data for Western Addition, 2016 saved to Western Addition_2016.xlsx Data for Western Addition, 2018 saved to Western Addition_2018.xlsx
Create topic modeling for each data
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import string
neighbor = case_df['neighborhood'].unique()
prefixes = neighbor
years = [2012, 2014, 2016, 2018]
files = {}
cases = {}
keys = []
for prefix in prefixes:
for year in years:
filename = f"{prefix}_{year}.xlsx"
df = pd.read_excel(filename, engine='openpyxl')
tp = df['case'].astype(str)
key = f"{prefix}_{year}"
keys.append(key)
files[key] = df
cases[key] = tp
tp_cases = {}
tp_cases_corpus = {}
nltk.download('stopwords')
for key in keys:
# Lowercasing the text to ensure consistency and remove punctuation
case_df = cases[key].str.lower().apply(lambda text: text.translate(str.maketrans('', '', string.punctuation)))
# Tokenizing the text into individual words
case_df = case_df.apply(lambda text: text.split())
# Removing stopwords
stop_words = set(stopwords.words('english'))
case_df = case_df.apply(lambda tokens: [word for word in tokens if word not in stop_words])
tp_cases_corpus[key] = case_df
# Applying stemming or lemmatization to reduce words to their base or root form.
stemmer = PorterStemmer()
case_df = case_df.apply(lambda tokens: [stemmer.stem(word) for word in tokens])
tp_cases[key] = case_df
[nltk_data] Downloading package stopwords to /Users/bzb/nltk_data... [nltk_data] Package stopwords is already up-to-date!
print(tp_cases[keys[0]].head())
0 [street, sidewalk, clean] 1 [street, sidewalk, clean] 2 [street, sidewalk, clean] 3 [street, sidewalk, clean] 4 [abandon, vehicl] Name: case, dtype: object
implement topic modeling
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
# Define the CountVectorizer for text preprocessing
vectorizer = CountVectorizer()
case_vec = {}
for key in keys:
case_vec[key] = vectorizer.fit_transform(tp_cases[key].apply(lambda x: np.str_(x)))
print(case_vec[keys[0]].shape)
(2092, 49)
topic_num_opt = []
n_topics_range = range(2, 8)
for key in keys:
log_likelihoods = []
for n_topics in n_topics_range:
lda = LatentDirichletAllocation(n_components=n_topics, random_state=0)
lda.fit(case_vec[key])
log_likelihoods.append(lda.score(case_vec[key]))
max_log_likelihood = max(log_likelihoods)
optimal_n_topics = log_likelihoods.index(max_log_likelihood) + 2
topic_num_opt.append(optimal_n_topics)
print(topic_num_opt)
[5, 2, 2, 4, 6, 7, 7, 6, 5, 5, 2, 3, 5, 6, 5, 5, 2, 3, 2, 3, 5, 6, 7, 7, 7, 7, 7, 5, 6, 7, 7, 7, 2, 5, 2, 4, 7, 5, 7, 6, 2, 4, 2, 4, 7, 6, 6, 6, 6, 6, 7, 4, 2, 2, 2, 2, 7, 5, 6, 6, 7, 6, 7, 7, 3, 4, 3, 3, 3, 4, 4, 5, 4, 3, 2, 3, 6, 5, 6, 5]
# Function to display the top words in each topic
def display_and_get_topics_with_keywords(model, feature_names, num_top_words=10):
topics = []
keyword = []
topics_dict = {}
for topic_idx, topic in enumerate(model.components_):
# Get indices of the top features for the current topic
top_features_ind = topic.argsort()[:-num_top_words - 1:-1]
# Get the top features (words) and their corresponding weights
top_features = [feature_names[i] for i in top_features_ind]
weights = topic[top_features_ind]
# Prepare topics for display
topics.append(" ".join(top_features))
# Prepare dictionary with topics, features, and weights
topics_dict[topic_idx] = dict(zip(top_features, weights))
# Display topics
for idx, topic_words in enumerate(topics, 1):
print(f"Topic {idx}:")
print(topic_words)
keyword.append(topic_words)
return keyword, topics_dict
no_top_words = [10, 3, 10, 10, 10, 0, 0, 0, 10, 10, 10, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 10, 10, 10, 0, 10, 0, 10, 0, 10, 0, 0, 0, 10, 0, 0, 0, 10, 10, 10, 0, 10, 10, 0, 0, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 10, 10, 10, 10, 10, 10, 0, 10]
w = []
words = {}
for i in range(len(topic_num_opt)):
lda = LatentDirichletAllocation(n_components=topic_num_opt[i], random_state=42)
lda.fit(case_vec[keys[i]])
# Display the top words in each topic
feature_names = vectorizer.get_feature_names()
print(f"Topics for {keys[i]}")
keywords, topics_dict = display_and_get_topics_with_keywords(lda, feature_names, no_top_words[i])
w.append(keywords)
words[keys[i]] = topics_dict
Topics for Bayview_2012 Topic 1: sewer rpd inspect post damag graffiti mta request block clean Topic 2: sfpd abandon issu sfha gener control catch basin request mta Topic 3: public feedback repair nois sidewalk mainten plan encamp concern care Topic 4: request rpd clean block graffiti mta control encamp gener public Topic 5: receptacl illeg public feedback enforc 311 muni curb dph build Topics for Bayview_2014 Topic 1: rpd mocd sidewalk Topic 2: temporari street curb Topics for Bayview_2016 Topic 1: abandon work rpd plan feedback graffiti mta vehicl temporari rec Topic 2: streetlight sign concern homeless mainten sfpd enforc dph street request Topics for Bayview_2018 Topic 1: muni tree mta report sign request litter public block clean Topic 2: rpd illeg extern work rec post repair nois defect puc Topic 3: street sidewalk control feedback post dti build defect receptacl enforc Topic 4: abandon vehicl graffiti park mocd sfha inspect enforc catch damag Topics for Bernal Heights_2012 Topic 1: sewer defect report 311 gener sidewalk extern illeg post issu Topic 2: report homeless public streetlight mta receptacl nois puc enforc anim Topic 3: street abandon sign muni sidewalk issu post extern illeg feedback Topic 4: sfpd dti dph properti muni clean block care litter request Topic 5: report rpd mainten residenti illeg extern sfha repair park rec Topic 6: sfpd sewer curb build sidewalk extern illeg issu post muni Topics for Bernal Heights_2014 Topic 1: Topic 2: Topic 3: Topic 4: Topic 5: Topic 6: Topic 7: Topics for Bernal Heights_2016 Topic 1: Topic 2: Topic 3: Topic 4: Topic 5: Topic 6: Topic 7: Topics for Bernal Heights_2018 Topic 1: Topic 2: Topic 3: Topic 4: Topic 5: Topic 6: Topics for Chinatown_2012 Topic 1: report receptacl catch enforc sewer mta build properti curb block Topic 2: control mocd illeg care basin residenti inspect request dti 311 Topic 3: post puc request inspect dti 311 issu properti build enforc Topic 4: extern homeless park enforc post dph defect feedback mainten concern Topic 5: public graffiti litter nois clean anim post enforc inspect request Topics for Chinatown_2014 Topic 1: mta gener issu repair build sewer residenti basin dti dph Topic 2: homeless graffiti rec litter public street plan enforc dph post Topic 3: park defect basin dti dph enforc homeless care receptacl 311 Topic 4: residenti sewer concern muni properti damag 311 feedback receptacl care Topic 5: rec report sfha rpd sfpd puc mainten block clean sidewalk Topics for Chinatown_2016 Topic 1: rpd sfha clean inspect public defect illeg request extern issu Topic 2: feedback receptacl sewer puc curb park damag mta properti dti Topics for Chinatown_2018 Topic 1: issu sewer inspect repair rec Topic 2: properti feedback street graffiti plan Topic 3: streetlight sign clean defect puc Topics for Excelsior_2012 Topic 1: request puc rec mainten sfha muni post curb mta gener Topic 2: abandon sfpd rec gener illeg repair park sidewalk dti plan Topic 3: residenti report concern basin clean block defect curb mainten rpd Topic 4: graffiti rpd defect rec 311 extern build sewer public residenti Topic 5: encamp properti litter damag nois inspect enforc clean basin residenti Topics for Excelsior_2014 Topic 1: gener receptacl street post sfha sewer dti build catch damag Topic 2: graffiti receptacl rpd rec gener encamp enforc extern 311 homeless Topic 3: issu puc litter sidewalk mainten report block clean receptacl gener Topic 4: residenti sewer concern receptacl muni public care illeg repair nois Topic 5: request inspect plan dph defect feedback mta residenti receptacl illeg Topic 6: abandon sign receptacl gener properti mocd illeg care sewer repair Topics for Excelsior_2016 Topic 1: sfpd mta temporari damag block gener nois sewer basin catch Topic 2: mocd report rpd plan repair dph extern defect puc residenti Topic 3: abandon tree rpd homeless street encamp sidewalk request muni receptacl Topic 4: sign sfpd clean illeg dph block concern street extern rpd Topic 5: mainten sfha rpd homeless rec work build litter dti graffiti Topics for Excelsior_2018 Topic 1: plan extern abandon tree mocd repair block build report receptacl Topic 2: sign sfpd concern dph clean basin block mta report build Topic 3: rpd homeless vehicl puc sfha mainten mta temporari muni receptacl Topic 4: sfpd damag park residenti gener nois basin clean block dph Topic 5: illeg sidewalk request enforc defect public street care litter properti Topics for Haight Ashbury_2012 Topic 1: post public clean repair abandon properti mta damag inspect extern Topic 2: curb issu enforc muni encamp graffiti dph receptacl feedback park Topics for Haight Ashbury_2014 Topic 1: report receptacl clean rec illeg sewer abandon mta enforc damag Topic 2: gener litter rpd request dti post issu encamp 311 basin Topic 3: curb nois repair public muni graffiti properti feedback plan park Topics for Haight Ashbury_2016 Topic 1: receptacl report clean nois damag repair properti rec illeg sewer Topic 2: feedback graffiti muni dti puc concern gener extern mainten enforc Topics for Haight Ashbury_2018 Topic 1: rpd request clean gener damag plan report inspect curb block Topic 2: receptacl feedback park homeless post sign concern graffiti defect mainten Topic 3: dti encamp muni rec mta mocd extern litter sfpd sidewalk Topics for Inner Richmond_2012 Topic 1: report damag properti homeless rpd muni dti build inspect post Topic 2: litter residenti report feedback 311 sfpd repair homeless curb public Topic 3: sidewalk sfha clean illeg mocd receptacl extern mainten block curb Topic 4: streetlight abandon sign defect report rec park puc graffiti dph Topic 5: mta street catch basin illeg mainten extern mocd receptacl concern Topics for Inner Richmond_2014 Topic 1: repair gener issu rec sign damag mocd tree properti extern Topic 2: sidewalk dph enforc block sfha damag sign concern graffiti encamp Topic 3: abandon temporari inspect residenti mta feedback sfpd receptacl street repair Topic 4: graffiti litter plan homeless streetlight basin catch vehicl control nois Topic 5: sfha sidewalk concern enforc block damag sign graffiti encamp mainten Topic 6: defect post repair muni puc public rpd park mainten encamp Topics for Inner Richmond_2016 Topic 1: sidewalk mainten repair report block streetlight sign sfpd residenti litter Topic 2: sfpd sign clean block curb litter sfha rec graffiti street Topic 3: residenti plan receptacl build gener inspect nois request feedback dph Topic 4: graffiti tree abandon rec care control anim enforc 311 litter Topic 5: damag public sewer issu muni extern street basin catch mocd Topic 6: residenti gener puc vehicl sign defect mta dti streetlight rpd Topic 7: illeg properti mocd temporari encamp curb homeless concern basin catch Topics for Inner Richmond_2018 Topic 1: Topic 2: Topic 3: Topic 4: Topic 5: Topic 6: Topic 7: Topics for Inner Sunset_2012 Topic 1: properti gener receptacl damag repair homeless report puc clean enforc Topic 2: puc control receptacl block homeless report clean repair enforc litter Topic 3: clean receptacl puc report homeless repair enforc extern litter dph Topic 4: plan encamp residenti mocd dph build feedback post issu muni Topic 5: rec park illeg encamp plan homeless report receptacl puc clean Topic 6: enforc mainten curb graffiti nois homeless basin catch report receptacl Topic 7: plan abandon request encamp 311 dti inspect mta concern care Topics for Inner Sunset_2014 Topic 1: temporari abandon repair mocd damag clean block mta control sfha Topic 2: mainten sewer sfpd request street residenti extern sidewalk plan sfha Topic 3: nois graffiti build rpd care sidewalk sfha residenti extern plan Topic 4: inspect mta streetlight dph sidewalk extern plan sfha sign concern Topic 5: defect properti sign issu post receptacl park report basin plan Topic 6: sfha sidewalk concern extern build damag plan sign inspect rpd Topic 7: residenti illeg tree public muni puc gener 311 encamp care Topics for Inner Sunset_2016 Topic 1: request rpd clean block curb feedback graffiti nois receptacl post Topic 2: feedback graffiti nois curb rpd request block clean receptacl post Topic 3: residenti issu sfpd puc receptacl sfha post catch basin extern Topic 4: park damag sidewalk abandon anim control care catch basin extern Topic 5: enforc mainten receptacl extern sign plan curb feedback rpd request Topic 6: rpd defect dti sewer gener concern rec mocd curb feedback Topic 7: receptacl illeg report extern inspect public litter dph build homeless Topics for Inner Sunset_2018 Topic 1: inspect plan block sfha report damag build issu graffiti sign Topic 2: report graffiti temporari properti residenti litter encamp dph mta dti Topic 3: sfpd sewer concern mocd street receptacl sidewalk sfha block dph Topic 4: homeless park enforc abandon streetlight basin catch puc mocd sidewalk Topic 5: muni gener post defect rec mainten illeg control repair nois Topics for Marina_2012 Topic 1: sewer mta properti rec damag request 311 extern rpd enforc Topic 2: rec feedback gener dti care homeless receptacl basin control repair Topic 3: defect park sidewalk plan graffiti nois feedback rec extern 311 Topic 4: request rpd concern 311 extern build rec encamp enforc gener Topic 5: abandon sfpd illeg report inspect public encamp extern 311 rpd Topic 6: issu rpd dph sfha mocd clean block anim catch curb Topics for Marina_2014 Topic 1: plan damag sewer sidewalk repair receptacl illeg park mocd encamp Topic 2: homeless rpd curb enforc park illeg mocd encamp sfha inspect Topic 3: issu residenti abandon street mainten sign basin clean illeg park Topic 4: muni feedback litter puc repair nois public inspect encamp mocd Topic 5: repair graffiti mta dti care extern 311 report request rec Topic 6: repair graffiti sfpd streetlight post defect sfha properti anim catch Topic 7: sfha rpd concern park illeg block mocd encamp inspect sfpd Topics for Marina_2016 Topic 1: temporari abandon inspect sewer damag sfha concern dph sidewalk gener Topic 2: request gener feedback mainten streetlight public tree street sfpd mta Topic 3: control graffiti receptacl litter dph sfha sidewalk gener clean mainten Topic 4: sidewalk sfha clean dph block gener mainten encamp concern sfpd Topic 5: defect properti encamp sfpd repair block catch basin mainten care Topic 6: muni extern sign concern damag dph sfha sidewalk gener clean Topic 7: request post homeless rec park 311 enforc report nois dph Topics for Marina_2018 Topic 1: Topic 2: Topic 3: Topic 4: Topic 5: Topic 6: Topic 7: Topics for Mission_2012 Topic 1: Topic 2: Topics for Mission_2014 Topic 1: Topic 2: Topic 3: Topic 4: Topic 5: Topics for Mission_2016 Topic 1: Topic 2: Topics for Mission_2018 Topic 1: Topic 2: Topic 3: Topic 4: Topics for Noe Valley_2012 Topic 1: abandon sewer damag properti report block gener mta request repair Topic 2: request repair clean enforc mainten block damag control residenti gener Topic 3: public extern nois control sfha litter park build mocd plan Topic 4: muni curb feedback block damag enforc mainten residenti control request Topic 5: receptacl homeless residenti inspect post block damag enforc mainten control Topic 6: dti public encamp 311 block damag mainten enforc residenti control Topic 7: issu rpd basin catch defect rec illeg extern public nois Topics for Noe Valley_2014 Topic 1: sidewalk sfha clean sign properti damag defect block illeg enforc Topic 2: sfpd report streetlight repair park rec plan sewer extern dph Topic 3: report homeless muni mainten receptacl puc encamp inspect post feedback Topic 4: abandon tree litter rpd gener nois illeg sign defect properti Topic 5: report homeless vehicl public mocd temporari curb sfha build enforc Topics for Noe Valley_2016 Topic 1: gener request properti vehicl illeg plan sidewalk repair park damag Topic 2: dph litter post rec enforc 311 extern request park sign Topic 3: request gener mainten mta feedback temporari mocd nois puc care Topic 4: graffiti park sign defect sfpd block control street request sidewalk Topic 5: issu sewer street request streetlight sidewalk curb homeless damag catch Topic 6: abandon tree sign dti block defect report muni park sfpd Topic 7: sfpd sign control block defect park street request sidewalk dti Topics for Noe Valley_2018 Topic 1: Topic 2: Topic 3: Topic 4: Topic 5: Topic 6: Topics for Outer Mission_2012 Topic 1: mta mainten block litter defect build anim catch clean curb Topic 2: abandon park inspect muni control homeless dph curb catch nois Topics for Outer Mission_2014 Topic 1: Topic 2: Topic 3: Topic 4: Topics for Outer Mission_2016 Topic 1: request rpd clean mainten sfha sewer enforc block puc litter Topic 2: sfpd abandon repair graffiti homeless issu report post sidewalk public Topics for Outer Mission_2018 Topic 1: Topic 2: Topic 3: Topic 4: Topics for Outer Richmond_2012 Topic 1: report 311 extern sewer graffiti properti temporari care request issu Topic 2: report graffiti encamp public dti gener residenti concern illeg mta Topic 3: sfha sidewalk control damag street mocd block illeg concern inspect Topic 4: streetlight abandon mainten rec issu park puc care report graffiti Topic 5: litter rpd post defect mocd clean basin feedback nois inspect Topic 6: sidewalk dph sfpd repair muni graffiti report inspect plan feedback Topic 7: homeless sign enforc anim catch curb build receptacl mta graffiti Topics for Outer Richmond_2014 Topic 1: Topic 2: Topic 3: Topic 4: Topic 5: Topic 6: Topics for Outer Richmond_2016 Topic 1: Topic 2: Topic 3: Topic 4: Topic 5: Topic 6: Topics for Outer Richmond_2018 Topic 1: Topic 2: Topic 3: Topic 4: Topic 5: Topic 6: Topics for Pacific Heights_2012 Topic 1: sfha rpd clean damag block puc extern mocd encamp properti Topic 2: enforc puc homeless report mainten streetlight park plan defect care Topic 3: abandon street inspect sign catch basin extern gener muni damag Topic 4: puc request sewer public dti 311 illeg properti rec care Topic 5: nois curb dph damag puc extern rpd mocd encamp sfha Topic 6: sfpd puc mta post control rpd concern feedback repair anim Topics for Pacific Heights_2014 Topic 1: Topic 2: Topic 3: Topic 4: Topic 5: Topic 6: Topics for Pacific Heights_2016 Topic 1: Topic 2: Topic 3: Topic 4: Topic 5: Topic 6: Topic 7: Topics for Pacific Heights_2018 Topic 1: Topic 2: Topic 3: Topic 4: Topics for Portola_2012 Topic 1: issu report inspect muni sign 311 graffiti public temporari mta Topic 2: sfpd sewer control streetlight abandon residenti mocd sidewalk dti feedback Topics for Portola_2014 Topic 1: issu report inspect sfpd mocd residenti mta receptacl dph properti Topic 2: sfha rpd concern street abandon homeless park defect control build Topics for Portola_2016 Topic 1: illeg abandon work rpd homeless mainten sfpd streetlight feedback mocd Topic 2: street sidewalk control graffiti nois encamp dph build feedback streetlight Topics for Portola_2018 Topic 1: Topic 2: Topics for Potrero Hill_2012 Topic 1: sfpd sign clean report sidewalk block curb enforc request sewer Topic 2: request sfha mainten sewer receptacl post puc tree enforc illeg Topic 3: temporari abandon encamp litter rpd concern graffiti enforc build request Topic 4: street inspect dph homeless request illeg enforc sewer mainten sfha Topic 5: repair mta muni streetlight defect sign curb properti issu basin Topic 6: request 311 feedback extern mocd nois illeg enforc sfha mainten Topic 7: illeg request park rec build dti damag public anim care Topics for Potrero Hill_2014 Topic 1: sign sfpd clean mainten repair block defect curb muni residenti Topic 2: feedback park sidewalk report streetlight post request homeless muni residenti Topic 3: request sfha homeless vehicl puc plan receptacl rec dti properti Topic 4: illeg abandon tree curb sfpd muni residenti defect repair mainten Topic 5: sewer litter street mocd temporari enforc damag public nois extern Topics for Potrero Hill_2016 Topic 1: Topic 2: Topic 3: Topic 4: Topic 5: Topic 6: Topics for Potrero Hill_2018 Topic 1: Topic 2: Topic 3: Topic 4: Topic 5: Topic 6: Topics for Presidio Heights_2012 Topic 1: extern properti anim build mta block mocd post plan catch Topic 2: nois plan care catch basin post mta park mainten public Topic 3: mocd gener issu dph graffiti post plan catch mta park Topic 4: mocd litter enforc 311 curb block dti defect damag mta Topic 5: encamp muni post clean homeless plan catch mta mainten park Topic 6: public abandon park mainten post plan catch mta basin nois Topic 7: damag mocd concern plan puc illeg feedback inspect control post Topics for Presidio Heights_2014 Topic 1: post rec encamp report rpd mta plan muni block catch Topic 2: public gener mocd concern 311 defect post feedback enforc curb Topic 3: receptacl puc care control basin feedback enforc curb repair post Topic 4: dph issu enforc mainten extern anim build homeless feedback curb Topic 5: repair homeless request clean puc curb feedback block encamp post Topic 6: abandon residenti post encamp inspect park graffiti litter nois illeg Topics for Presidio Heights_2016 Topic 1: issu report request damag properti catch curb anim gener repair Topic 2: graffiti sewer extern mta receptacl muni dph request rpd block Topic 3: mainten sidewalk illeg park litter puc clean basin dph request Topic 4: residenti repair sfpd rec nois public dph request rpd sewer Topic 5: rpd request concern block dph sewer graffiti properti puc litter Topic 6: repair gener street post mocd care inspect encamp enforc 311 Topic 7: defect plan sign abandon sfha build homeless control gener repair Topics for Presidio Heights_2018 Topic 1: abandon sewer concern mta damag extern catch enforc puc clean Topic 2: issu dti request homeless plan anim build illeg enforc puc Topic 3: public repair residenti park post mainten mocd feedback properti litter Topic 4: report control nois encamp public enforc puc clean mainten damag Topic 5: receptacl report care basin clean enforc puc mainten damag illeg Topic 6: public encamp enforc sfha muni rec graffiti inspect block curb Topic 7: mainten defect illeg rpd puc block public enforc clean damag Topics for Seacliff_2012 Topic 1: abandon mta mainten feedback homeless defect encamp concern control block Topic 2: illeg enforc inspect encamp extern 311 dph dti mocd graffiti Topic 3: litter issu catch mainten control concern block feedback homeless abandon Topics for Seacliff_2014 Topic 1: gener properti puc catch basin extern inspect enforc abandon rec Topic 2: mta encamp litter receptacl issu plan graffiti mocd public homeless Topic 3: rec abandon enforc control curb feedback nois anim care concern Topic 4: post park clean feedback nois enforc curb control block concern Topics for Seacliff_2016 Topic 1: post encamp properti gener mocd residenti block feedback mta illeg Topic 2: receptacl enforc plan issu mainten extern puc park inspect dph Topic 3: request abandon rec public care concern report homeless clean basin Topics for Seacliff_2018 Topic 1: sewer abandon report receptacl clean damag mta curb block control Topic 2: mainten dph illeg rpd rec graffiti extern request encamp issu Topic 3: public enforc sfha muni control repair inspect post build gener Topics for South of Market_2012 Topic 1: Topic 2: Topic 3: Topics for South of Market_2014 Topic 1: Topic 2: Topic 3: Topic 4: Topics for South of Market_2016 Topic 1: Topic 2: Topic 3: Topic 4: Topics for South of Market_2018 Topic 1: Topic 2: Topic 3: Topic 4: Topic 5: Topics for Visitacion Valley_2012 Topic 1: report receptacl control damag properti repair build defect mta gener Topic 2: illeg plan public extern issu enforc 311 nois catch anim Topic 3: request homeless puc litter park public gener mocd defect mta Topic 4: abandon rpd extern public sewer muni dph inspect residenti encamp Topics for Visitacion Valley_2014 Topic 1: properti puc care report abandon plan extern gener repair clean Topic 2: nois rec dti encamp mta feedback illeg mocd request litter Topic 3: homeless dph puc control damag repair gener clean extern plan Topics for Visitacion Valley_2016 Topic 1: sfha rpd catch repair extern enforc plan streetlight damag inspect Topic 2: street abandon homeless request sfpd encamp mainten park curb control Topics for Visitacion Valley_2018 Topic 1: temporari abandon report gener tree properti puc damag post defect Topic 2: sewer sfpd concern mainten dph streetlight clean basin block damag Topic 3: nois enforc graffiti rpd issu feedback mta litter rec encamp Topics for Western Addition_2012 Topic 1: inspect request plan park block clean rec enforc catch damag Topic 2: request sfha post receptacl residenti care park dti streetlight sign Topic 3: abandon temporari mta sewer feedback repair nois graffiti muni park Topic 4: street request 311 homeless dph public sidewalk report defect care Topic 5: sign sfpd curb streetlight park dti build defect request sfha Topic 6: issu request inspect tree puc extern build concern basin illeg Topics for Western Addition_2014 Topic 1: sfpd sewer defect block curb sign sidewalk concern receptacl issu Topic 2: graffiti sfha rec encamp mocd sign receptacl sidewalk gener concern Topic 3: clean sewer sfpd sidewalk sign extern 311 report homeless concern Topic 4: mainten street damag plan puc litter streetlight abandon illeg park Topic 5: receptacl rpd muni feedback gener issu residenti post temporari mta Topics for Western Addition_2016 Topic 1: Topic 2: Topic 3: Topic 4: Topic 5: Topic 6: Topics for Western Addition_2018 Topic 1: request gener public work damag properti litter receptacl temporari sign Topic 2: encamp issu sewer defect street sign repair streetlight homeless concern Topic 3: sidewalk street clean block concern homeless control anim care curb Topic 4: feedback muni post illeg vehicl abandon extern 311 request homeless Topic 5: graffiti park enforc mainten tree rec request curb report nois
Deal with null data
filenames = []
topic_num = {}
i = 0
for key in keys:
if words[key][0] == {}:
filenames.append(key)
topic_num[key] = topic_num_opt[i]
i += 1
print(filenames)
print(topic_num)
['Bernal Heights_2014', 'Bernal Heights_2016', 'Bernal Heights_2018', 'Inner Richmond_2018', 'Marina_2018', 'Mission_2012', 'Mission_2014', 'Mission_2016', 'Mission_2018', 'Noe Valley_2018', 'Outer Mission_2014', 'Outer Mission_2018', 'Outer Richmond_2014', 'Outer Richmond_2016', 'Outer Richmond_2018', 'Pacific Heights_2014', 'Pacific Heights_2016', 'Pacific Heights_2018', 'Portola_2018', 'Potrero Hill_2016', 'Potrero Hill_2018', 'South of Market_2012', 'South of Market_2014', 'South of Market_2016', 'South of Market_2018', 'Western Addition_2016']
{'Bernal Heights_2014': 7, 'Bernal Heights_2016': 7, 'Bernal Heights_2018': 6, 'Inner Richmond_2018': 7, 'Marina_2018': 7, 'Mission_2012': 2, 'Mission_2014': 5, 'Mission_2016': 2, 'Mission_2018': 4, 'Noe Valley_2018': 6, 'Outer Mission_2014': 4, 'Outer Mission_2018': 4, 'Outer Richmond_2014': 6, 'Outer Richmond_2016': 6, 'Outer Richmond_2018': 6, 'Pacific Heights_2014': 6, 'Pacific Heights_2016': 7, 'Pacific Heights_2018': 4, 'Portola_2018': 2, 'Potrero Hill_2016': 6, 'Potrero Hill_2018': 6, 'South of Market_2012': 3, 'South of Market_2014': 4, 'South of Market_2016': 4, 'South of Market_2018': 5, 'Western Addition_2016': 6}
for filename in filenames:
data = pd.read_excel(f'{filename}.xlsx')
case1 = data['case'].astype(str)
# Lowercasing the text to ensure consistency and remove punctuation
case1 = case1.str.lower().apply(lambda text: text.translate(str.maketrans('', '', string.punctuation)))
# Tokenizing the text into individual words
case1 = case1.apply(lambda text: text.split())
# Removing stopwords
stop_words = set(stopwords.words('english'))
case1 = case1.apply(lambda tokens: [word for word in tokens if word not in stop_words])
# Applying stemming or lemmatization to reduce words to their base or root form.
stemmer = PorterStemmer()
case1 = case1.apply(lambda tokens: [stemmer.stem(word) for word in tokens])
# Define the CountVectorizer for text preprocessing
vectorizer = CountVectorizer()
case_vec1 = vectorizer.fit_transform(case1.apply(lambda x: np.str_(x)))
lda = LatentDirichletAllocation(n_components=topic_num[filename], random_state=42)
lda.fit(case_vec1)
# Display the top words in each topic
no_top_word = 10
feature_name = vectorizer.get_feature_names()
print(f"Topics for {filename}")
keywords, topics_dict = display_and_get_topics_with_keywords(lda, feature_name, no_top_word)
keys_list = list(words.keys())
index = keys_list.index(filename)
w[index] = keywords
words[filename] = topics_dict
Topics for Bernal Heights_2014 Topic 1: damag properti gener request litter receptacl mta curb puc anim Topic 2: muni feedback illeg post graffiti estat real mainten request tree Topic 3: request sewer issu sfha park rec residenti art commiss moh Topic 4: sidewalk street block librari curb public gener request graffiti estat Topic 5: abandon vehicl request gener public work dph extern 311 basin Topic 6: clean street sidewalk graffiti mainten tree real estat basin catch Topic 7: streetlight defect street sign encamp request temporari repair gener build Topics for Bernal Heights_2016 Topic 1: post illeg block concern homeless catch basin mainten sidewalk street Topic 2: request gener feedback muni sfha public work litter receptacl mta Topic 3: abandon vehicl commiss art mon gener request temporari defect repair Topic 4: street sidewalk clean sign repair defect temporari block request mon Topic 5: mainten tree request rec park curb sidewalk nois report extern Topic 6: streetlight damag properti gener request build inspect anim control care Topic 7: graffiti encamp sewer issu mon temporari defect repair sign sidewalk Topics for Bernal Heights_2018 Topic 1: park enforc rec request streetlight nois report sfpd defect sewer Topic 2: graffiti request gener work public curb litter receptacl extern 311 Topic 3: encamp request gener feedback muni damag properti mta concern homeless Topic 4: sidewalk street clean sewer issu defect block dpw program volunt Topic 5: abandon vehicl repair sign build inspect residenti sfpd request gener Topic 6: mainten tree illeg post estat real depart basin catch request Topics for Inner Richmond_2018 Topic 1: muni feedback sewer issu block receptacl litter curb streetlight color Topic 2: request gener public work rec park librari plan sfpd fire Topic 3: mainten tree request gener mta dph sign temporari puc basin Topic 4: sidewalk street clean illeg post block curb sfpd depart fire Topic 5: park enforc abandon vehicl repair sign build inspect residenti anim Topic 6: encamp street defect nois report homeless concern short term rental Topic 7: graffiti properti damag post illeg sfpd street sidewalk clean fire Topics for Marina_2018 Topic 1: litter receptacl block concern homeless sidewalk street central shop defect Topic 2: mainten tree catch basin central shop defect streetlight encamp sidewalk Topic 3: park enforc encamp sewer issu illeg post streetlight central shop Topic 4: request gener public work rec park sign temporari mta repair Topic 5: graffiti abandon vehicl properti damag curb color term short rental Topic 6: feedback muni nois report central shop defect streetlight encamp sidewalk Topic 7: street sidewalk clean defect shop central block curb gener request Topics for Mission_2012 Topic 1: street sidewalk clean request gener litter receptacl public work extern Topic 2: graffiti properti damag mainten encamp abandon vehicl tree issu sewer Topics for Mission_2014 Topic 1: encamp sign sidewalk curb repair receptacl litter post illeg request Topic 2: graffiti muni feedback streetlight defect sidewalk street block clean encamp Topic 3: request gener abandon vehicl work public build mta dph park Topic 4: street sidewalk clean streetlight block defect graffiti encamp feedback muni Topic 5: damag properti issu sewer mainten tree catch basin streetlight defect Topics for Mission_2016 Topic 1: graffiti encamp concern homeless sign repair litter receptacl illeg post Topic 2: sidewalk street clean request gener mainten tree damag properti feedback Topics for Mission_2018 Topic 1: request gener work public mainten tree abandon vehicl feedback muni Topic 2: park enforc curb request receptacl litter rec sewer issu streetlight Topic 3: sidewalk street clean homeless concern damag properti catch basin block Topic 4: encamp graffiti sign repair inspect build request gener catch basin Topics for Noe Valley_2018 Topic 1: abandon vehicl streetlight illeg post catch basin mainten color defect Topic 2: request gener properti damag nois report sign temporari mta rec Topic 3: park enforc sewer issu mainten tree defect extern 311 basin Topic 4: street sidewalk clean block curb color defect park streetlight mainten Topic 5: muni feedback graffiti encamp litter receptacl color defect curb street Topic 6: request gener work public repair sign puc build inspect plan Topics for Outer Mission_2014 Topic 1: street sidewalk clean abandon vehicl graffiti issu sewer defect block Topic 2: streetlight litter receptacl mainten tree feedback muni inspect build basin Topic 3: request gener rec park public work sign dph repair control Topic 4: encamp sidewalk curb properti damag mta 311 extern illeg post Topics for Outer Mission_2018 Topic 1: vehicl abandon illeg post receptacl litter residenti catch basin term Topic 2: sidewalk street clean defect properti damag report nois block curb Topic 3: park enforc graffiti encamp mainten tree sewer issu curb feedback Topic 4: request gener work public sign repair streetlight mta rec build Topics for Outer Richmond_2014 Topic 1: graffiti sidewalk curb request gener puc park rec build basin Topic 2: litter receptacl mainten tree request sfha basin catch plan encamp Topic 3: street sidewalk clean damag properti defect encamp post illeg plan Topic 4: sewer issu feedback muni streetlight dti human agenc servic request Topic 5: abandon vehicl gener request work public dph mta care anim Topic 6: sign repair request 311 extern block temporari sidewalk street plan Topics for Outer Richmond_2016 Topic 1: request build inspect sign temporari residenti sfha depart fire assessor Topic 2: street sidewalk clean inspect build block graffiti streetlight encamp gener Topic 3: vehicl abandon graffiti gener work public rec park request commiss Topic 4: receptacl litter mainten tree block extern 311 catch basin dti Topic 5: curb sidewalk gener properti damag request repair sign encamp dph Topic 6: sewer issu feedback muni illeg post defect street streetlight concern Topics for Outer Richmond_2018 Topic 1: mainten tree receptacl litter request rec park residenti catch basin Topic 2: muni feedback illeg post curb sign repair puc temporari request Topic 3: request gener properti damag concern homeless defect streetlight mta dph Topic 4: park enforc vehicl abandon issu sewer encamp sidewalk street block Topic 5: graffiti request gener work public nois report basin catch extern Topic 6: sidewalk street clean block defect curb encamp graffiti park sewer Topics for Pacific Heights_2014 Topic 1: litter receptacl mainten tree post illeg catch basin request curb Topic 2: request sign temporari graffiti park rec curb repair residenti build Topic 3: properti damag streetlight defect street encamp request curb graffiti mon Topic 4: gener request sewer issu public work mta puc build inspect Topic 5: request sfha vehicl abandon muni feedback 311 extern dph gener Topic 6: sidewalk street clean block curb request graffiti encamp streetlight mon Topics for Pacific Heights_2016 Topic 1: streetlight defect graffiti curb damag properti encamp street sidewalk block Topic 2: report nois control care anim central shop request gener defect Topic 3: mainten tree homeless concern litter receptacl build inspect catch basin Topic 4: sidewalk street clean properti damag curb block defect graffiti encamp Topic 5: illeg post vehicl abandon sign repair defect graffiti curb properti Topic 6: request feedback muni sfha temporari encamp sign rec park extern Topic 7: graffiti gener request sewer issu public work defect mta dph Topics for Pacific Heights_2018 Topic 1: request gener sign public work temporari vehicl abandon concern homeless Topic 2: feedback muni mainten tree issu sewer build dph inspect residenti Topic 3: park enforc encamp rec request defect illeg post repair streetlight Topic 4: street sidewalk clean graffiti damag properti block litter receptacl curb Topics for Portola_2018 Topic 1: graffiti park enforc abandon vehicl request gener feedback muni public Topic 2: street sidewalk clean curb post illeg block defect streetlight encamp Topics for Potrero Hill_2016 Topic 1: sidewalk mainten tree curb block nois report catch basin street Topic 2: encamp request park rec vehicl abandon basin catch residenti build Topic 3: sign repair feedback muni receptacl litter defect damag properti anim Topic 4: request sfha concern homeless gener issu sewer work public streetlight Topic 5: street sidewalk clean defect block streetlight encamp request graffiti build Topic 6: graffiti illeg post build inspect supervisor board gener request streetlight Topics for Potrero Hill_2018 Topic 1: request gener work public sfha mta litter receptacl dph catch Topic 2: graffiti mainten illeg post tree rental term short catch basin Topic 3: encamp request rec park homeless concern defect streetlight residenti build Topic 4: park enforc sign repair nois report temporari request color shop Topic 5: vehicl abandon curb sidewalk feedback muni damag properti build inspect Topic 6: street sidewalk clean issu sewer block extern 311 central shop Topics for South of Market_2012 Topic 1: request graffiti gener litter receptacl streetlight mainten sfha tree work Topic 2: street sidewalk clean encamp abandon vehicl curb block repair sign Topic 3: properti damag extern 311 request issu sewer defect illeg post Topics for South of Market_2014 Topic 1: streetlight issu sewer vehicl abandon request rec park shop central Topic 2: graffiti sidewalk curb encamp block sign repair gener request dph Topic 3: damag properti feedback muni mainten tree receptacl litter basin catch Topic 4: street sidewalk clean request gener defect temporari sign work public Topics for South of Market_2016 Topic 1: muni feedback sign repair block abandon vehicl build residenti inspect Topic 2: street sidewalk clean graffiti damag properti issu sewer curb report Topic 3: encamp request gener defect temporari sign work public mta illeg Topic 4: concern homeless streetlight litter receptacl mainten tree basin catch control Topics for South of Market_2018 Topic 1: clean sidewalk street feedback muni litter receptacl mainten tree basin Topic 2: park enforc report nois illeg post rec puc attorney district Topic 3: graffiti homeless concern request sign gener issu sewer temporari mta Topic 4: properti damag street sidewalk block defect curb anim care control Topic 5: encamp request gener public work abandon vehicl depart fire plan Topics for Western Addition_2016 Topic 1: request sfha feedback muni sign temporari repair gener puc dph Topic 2: request gener work public abandon vehicl sewer issu receptacl litter Topic 3: damag properti street defect catch basin mainten extern 311 curb Topic 4: mainten tree encamp streetlight mocd basin catch request gener extern Topic 5: sidewalk street clean rec park curb block 311 extern request Topic 6: graffiti illeg post report nois 311 extern curb park rec
Create wordcloud for each topic
from wordcloud import WordCloud
import matplotlib.pyplot as plt
def generate_wordclouds(topics_dict, k):
neighborhood = k.split('_')[0]
for topic_id, weights in topics_dict.items():
if weights == {}:
pass
else:
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(weights)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.title(f'Topic {topic_id + 1}')
plt.axis("off")
plt.savefig(f'{neighborhood}/wordcloud/{k}_{topic_id + 1}.png')
plt.show()
for key in keys:
print(f'wordcloud for {key}')
generate_wordclouds(words[key], key)
wordcloud for Bayview_2012
wordcloud for Bayview_2014
wordcloud for Bayview_2016
wordcloud for Bayview_2018
wordcloud for Bernal Heights_2012
wordcloud for Bernal Heights_2014
wordcloud for Bernal Heights_2016
wordcloud for Bernal Heights_2018
wordcloud for Chinatown_2012
wordcloud for Chinatown_2014
wordcloud for Chinatown_2016
wordcloud for Chinatown_2018
wordcloud for Excelsior_2012
wordcloud for Excelsior_2014
wordcloud for Excelsior_2016
wordcloud for Excelsior_2018
wordcloud for Haight Ashbury_2012
wordcloud for Haight Ashbury_2014
wordcloud for Haight Ashbury_2016
wordcloud for Haight Ashbury_2018
wordcloud for Inner Richmond_2012
wordcloud for Inner Richmond_2014
wordcloud for Inner Richmond_2016
wordcloud for Inner Richmond_2018
wordcloud for Inner Sunset_2012
wordcloud for Inner Sunset_2014
wordcloud for Inner Sunset_2016
wordcloud for Inner Sunset_2018
wordcloud for Marina_2012
wordcloud for Marina_2014
wordcloud for Marina_2016
wordcloud for Marina_2018
wordcloud for Mission_2012
wordcloud for Mission_2014
wordcloud for Mission_2016
wordcloud for Mission_2018
wordcloud for Noe Valley_2012
wordcloud for Noe Valley_2014
wordcloud for Noe Valley_2016
wordcloud for Noe Valley_2018
wordcloud for Outer Mission_2012
wordcloud for Outer Mission_2014
wordcloud for Outer Mission_2016
wordcloud for Outer Mission_2018
wordcloud for Outer Richmond_2012
wordcloud for Outer Richmond_2014
wordcloud for Outer Richmond_2016
wordcloud for Outer Richmond_2018
wordcloud for Pacific Heights_2012
wordcloud for Pacific Heights_2014
wordcloud for Pacific Heights_2016
wordcloud for Pacific Heights_2018
wordcloud for Portola_2012
wordcloud for Portola_2014
wordcloud for Portola_2016
wordcloud for Portola_2018
wordcloud for Potrero Hill_2012
wordcloud for Potrero Hill_2014
wordcloud for Potrero Hill_2016
wordcloud for Potrero Hill_2018
wordcloud for Presidio Heights_2012
wordcloud for Presidio Heights_2014
wordcloud for Presidio Heights_2016
wordcloud for Presidio Heights_2018
wordcloud for Seacliff_2012
wordcloud for Seacliff_2014
wordcloud for Seacliff_2016
wordcloud for Seacliff_2018
wordcloud for South of Market_2012
wordcloud for South of Market_2014
wordcloud for South of Market_2016
wordcloud for South of Market_2018
wordcloud for Visitacion Valley_2012
wordcloud for Visitacion Valley_2014
wordcloud for Visitacion Valley_2016
wordcloud for Visitacion Valley_2018
wordcloud for Western Addition_2012
wordcloud for Western Addition_2014
wordcloud for Western Addition_2016
wordcloud for Western Addition_2018
trend = pd.DataFrame()
neighborhood = []
year = []
i = 0
num = []
for key in keys:
for j in range(topic_num_opt[i]):
neighborhood.append(key.split('_')[0])
year.append(key.split('_')[1])
num.append(topic_num_opt[i])
i += 1
trend['neighborhood'] = neighborhood
trend['year'] = year
trend['tp_number'] = num
topic_id = [i for j in topic_num_opt for i in range(1, j + 1)]
trend['topic'] = pd.DataFrame(topic_id, columns=['topic'])
w_flat_list = [item for sublist in w for item in sublist]
trend['keyword'] = w_flat_list
flat_data = []
for area, sub_dicts in words.items():
for sub_key, keywords in sub_dicts.items():
flat_data.append({'weight': keywords})
trend['weight'] = pd.DataFrame(flat_data)
trend.to_excel('Trend based on neighbors.xlsx')